% code to replicate Brady(2011)
% OLS regression of the panel dynaic model
% Use county level data from california realtors association (median house
% price)
clear
data = xlsread('realtor.xls','Sheet1');
x    = xlsread('weighting matrix_realtor.xlsx','Final', 'C3:AL38');
x(isnan(x))=0;
save realtor.mat
clear
load realtor %my prepared data loaded from the Matlab workspace (previously saved as a .mat file)

T = 143;  %Time in months for each region-1
nc = 36; %The number of regions

reg = data(:,12); %County numbers
dt = data(:,4); %date
y = data(:,17); %housing prices (natural log)
y1 = data(:,18); %housing prices lag
x1 = data(:,8);  %unemployment
x2 = data(:,22);  %construction (natural log)
x3 = data(:,10);  %realrate
x4 = data(:,23);  %population (natural log)
x5 = data(:,24); %industrial production (natural log)
x6 = data(:,13:15);  %season dummies
x7 = data(:,45:79);  %county dummies (46 for 47 counties)

%lags of the regressors
y2 = data(:,19); y3 = data(:,20); y4 = data(:,21); %more housing lags
x1_1 = data(:,29); x1_2 = data(:,30); x1_3 = data(:,31); x1_4 = data(:,32); %unemployment lags
x2_1 = data(:,25); x2_2 = data(:,26); x2_3 = data(:,27); x2_4 = data(:,28); %construction lags
x3_1 = data(:,41); x3_2 = data(:,42); x3_3 = data(:,43); x3_4 = data(:,44); %realrate lags
x4_1 = data(:,33); x4_2 = data(:,34); x4_3 = data(:,35); x4_4 = data(:,36); %population lags
x5_1 = data(:,37); x5_2 = data(:,38); x5_3 = data(:,39); x5_4 = data(:,40); %IP lags
yleads = data(:,80:179);  %Housing price leads +1 through +60 (the third column in leads is the current hprice)

% 2) Here I define the initial weighting matrix.  James Lesage does provide
% code for automating this part, but I chose to define the matrix by
% "hand."

% weighting matrix is from excel file

% In the next few lines I sort the data
lags = [y2 y3 y4 x1_1 x1_2 x1_3 x1_4 x2_1 x2_2 x2_3 x2_4 x3_1 x3_2 x3_3 x3_4 x4_1 x4_2 x4_3 x4_4 x5_1 x5_2 x5_3 x5_4];
allp = [dt reg y y1 x1 x2 x3 x4 x5 x6 x7 yleads lags];
[n,m] = size(allp);

% Here I rid the data matrix "all" of the observations lost due to the lags
% used in estimation (i.e., if you decide on an AR(1), or AR(2) . . . )
for k = 1:n;
     %if k == 2671, break, end  %This should be plus 1 of the final sample (after taking lags)
 if k == 5149, break, end
 if allp(k,1) == 1; %Since I sorted the data in Excel, I had some zeros (after taking lags), which I changed to .0012 to import into Matlab
    allp(k,:) = [];        %This line gets rid of rows with any cell equal to 0.0012.
 end;
end;


allp = sortrows(allp,1); %This sorts the matrix according to data ("dt"), this is required by Lesage's code used below.
[n,m] = size(allp); 
y =  (allp(:,3))*100; %Here I just redefine the variables in the "all" matrix
y1 = (allp(:,4))*100;
x1 = allp(:,5);
x2 = (allp(:,6))*100;
x3 = allp(:,7);
x4 = (allp(:,8))*100;
x5 = (allp(:,9))*100;
x6 = allp(:,10:12);
x7 = allp(:,13:47);
dt = allp(:,1);
yleads = (allp(:,48:147))*100; %36 Leads
lags = (allp(:,148:m))*100;
W = x; %Rename the weighting matrix consistent with Lesage's code.

% Lesage's code: "sparse" "normw" and "slag" are from Lesage's spatial toolbox (see his webiste).  
WW = sparse(W);
W1 = normw(WW);
W = slag(W1,2);

% This code creates the "spatial lag" variable denoted in the literature as Wy.
% I also estimate the "lags" for the regressors for use in IV.
% These commands are based on Paul Elhorst's "sar_panel.m" file
% provided in Lesage's toolkit.

for t=1:T
    t1=1+(t-1)*nc;t2=t*nc;
    Wy([t1:t2],1)= sparse(W)*y([t1:t2],1);
end
%Instruments
for t=1:T
    t1=1+(t-1)*nc;t2=t*nc;
    Wx1([t1:t2],1)= sparse(W)*x1([t1:t2],1);
end
for t=1:T
    t1=1+(t-1)*nc;t2=t*nc;
    Wx2([t1:t2],1)= sparse(W)*x2([t1:t2],1);
end
for t=1:T
    t1=1+(t-1)*nc;t2=t*nc;
    Wx4([t1:t2],1)= sparse(W)*x4([t1:t2],1);
end
for t=1:T
    t1=1+(t-1)*nc;t2=t*nc;
    Wx1_1([t1:t2],1)= sparse(W)*lags([t1:t2],4);
end

for t=1:T
    t1=1+(t-1)*nc;t2=t*nc;
    Wx2_1([t1:t2],1)= sparse(W)*lags([t1:t2],8);
end

for t=1:T
    t1=1+(t-1)*nc;t2=t*nc;
    Wy1([t1:t2],1)= sparse(W)*y1([t1:t2],1);
end

% Note: There are plenty of OLS or TSLS matlab programs out there (see Lesage's
% collection of code, for example). I include my own code *in* the program
% for my own satisfaction (as a way to keep track of things).  
% You could easily cut lines 184 through 270 and go straigt to the loop
% below (which uses Lesage's hwhite(y,X) program).  

% 3) OLS 

W_nt=kron(eye(T),W);
W1_nt=kron(eye(T),W1);

X1=[Wy y1 x1 x2 x3 x4 x5 x6];
X2 = [y1 x1 x2 x3 x4 x5 x6]; % for sarar_panel_FE_LY(y,X2,W,W,30)
X3=[x1 x2 x3 x4 x5 x6];

% The first SAR test: OLS without the spatial regressor
[n,m] = size(y);
cc = ones(n,1);
X = [cc y1 x1 x2 x3 x4 x5 x6 x7];
%X = [cc y1 x1  x3 x4 x5 x6 x7];
[n,k] = size(X);
bhat = inv(X'*X)*(X'*y);  
ybar = mean(y);
sst=(y-ybar)'*(y-ybar);
e_fs = y - X*bhat;  
y_OLS = X*bhat;
sse = e_fs'*e_fs/(n-k);  

varco = sse*inv(X'*X);  
se = sqrt(diag(varco));
tstat = bhat./se;
pvalue = 2*(1 - tcdf(abs(tstat),n-k));  
OLS_results.y_OLS = y_OLS;
OLS_results.bhat = bhat;
OLS_results.e_fs = e_fs;
OLS_results.se = se;
OLS_results.tstat = tstat;
OLS_results.pvalue = pvalue;
OLS_results.r2=1-(n-k)*sse/sst; % Shulin Shen add the R-square
OLS_results.ad_r2=1-(n-1)/(n-k-2)*(n-k)*sse/sst; % Shulin Shen add the adjusted R-square



% The Second Sar Test: OLS with spatial regressor Shulin Shen write the OLS code
[n,m] = size(y);
cc = ones(n,1);
X = [cc Wy y1 x1 x2 x3 x5 x4 x6 x7];
%X = [cc Wy y1 x1  x3 x4 x5 x6 x7];
X1 = [Wy y1 x1 x2 x3 x4 x5 x6]; % for lr_f_err(y,X1,W,N); 
X2 = [y1 x1 x2 x3 x4 x5 x6]; % for lm_f_err(y,X2,W,N)
[n,k] = size(X);
bhat = inv(X'*X)*(X'*y);  
ybar = mean(y);
sst=(y-ybar)'*(y-ybar); % Shulin Shen add the total sum of squares
e_fs = y - X*bhat;  
y_OLS = X*bhat;
sse = e_fs'*e_fs/(n-k);  
varco = sse*inv(X'*X);  
se = sqrt(diag(varco));
tstat = bhat./se;
pvalue = 2*(1 - tcdf(abs(tstat),n-k));  
OLS_results.y_OLS = y_OLS;
OLS_results.bhat = bhat;
OLS_results.e_fs = e_fs;
OLS_results.se = se;
OLS_results.tstat = tstat;
OLS_results.pvalue = pvalue;
OLS_results.r2=1-(n-k)*sse/sst; % Shulin Shen add the R-square
OLS_results.ad_r2=1-(n-1)/(n-k-2)*(n-k)*sse/sst; % Shulin Shen add the adjusted R-square

% Shulin Shen's White's Corrected Standard Errors

sigma=diag(e_fs.^2);

varco_white_beta=inv(X'*X)*(X'*sigma*X)*inv(X'*X);

se_white=sqrt(diag(varco_white_beta));

tstat_white=bhat./se_white;

OLS_results.pvalue_white=2*(1 - tcdf(abs(tstat_white),n-k));


%White's Corrected Standard Errors (from Lesage's Toolbox)
% white_se = hwhite(y,X);
% white_tstat = white_se.tstat;
%calclute the white se's
% se_white = bhat./white_tstat;
% pvalue_white = 2*(1 - tcdf(abs(white_tstat),n-k));
% OLS_results.se_white = se_white;
% OLS_results.pvalue_white= pvalue_white;
% end of OLS 

OLS_RE=[OLS_results.bhat  se_white  OLS_results.pvalue_white  OLS_results.pvalue  OLS_results.tstat  OLS_results.se];

% code from Brady 


%__________________________________________________________________________
%White's Corrected Standard Errors (from Lesage's Toolbox)
white_se = hwhite(y,X);
white_tstat = white_se.tstat;
%testauto = wool(e_fs,X); %test for autocorrelation
%???????????????????????????????????????????????????????????????????????????????????????

results.k = k;    
results.n = n;
results.X = X;
results.y_pred = y_OLS;
results.sse = sse;
results.bhat = bhat;
results.rho = bhat(2,:);
results.e = e_fs;
results.se = se;
results.tstat = tstat;
results.pvalue = pvalue;
%calclute the white se's
se_white = bhat./white_tstat;
pvalue_white = 2*(1 - tcdf(abs(white_tstat),n-k));
%end calculation for those stats


%Calculate the Spatial statistics
T=143; % number of time periods: 180 - one lag
N=36; % number of regions
nobs=N*T;

%***************************************************************
%LM Test on residuals for the SAR model
%this program is pasted in from Lesage's program and modified for panel
%data
es = results.e; %Residuals from the SAR-OLS estimation
rho = results.rho;
% recover variance of rho
rhot = results.tstat(2,1);
sige = results.sse;
stdt = rhot/rho;
stdt = 1/stdt;
varr = stdt*stdt;
[n junk] = size(W);
A = speye(n) - rho*sparse(W);
AI = inv(A);
W2 = sparse(W);
%T22 = trace(W2*W2 + W2'*W2);
%T21 = trace(W2*W*AI + W2'*W*AI);
T22 = (T-1)*trace(W2*W2 + W2'*W2);
T21 = (T-1)*trace(W2*W*AI + W2'*W*AI);

lm1=0;
for t=1:T  %This "for" statement modifies Lesage's code for panel data; I used template from Elhorst's sar_panel.m
    t1=1+(t-1)*N;t2=t*N;
lm1 =lm1+ (es([t1:t2],:)'*W*es([t1:t2],:))/sige;
end
Tterm = (T22 - T21*T21*varr);
TI = inv(Tterm);
lmerr = lm1*lm1*TI;
prob = 1-chis_prb(lmerr,1);

LMSARresult.meth = 'lmsar';
LMSARresult.lm = lmerr;
LMSARresult.prob = prob;
LMSARresult.chi1   = 6.635;
LMSARresult.nobs = nobs;
LMSARresult.nvar = k;



